import torch
import torch.nn as nn
import torch.fft


class Model(nn.Module):
    """
    Informer with Propspare attention in O(LlogL) complexity
    """
    def __init__(self, configs):
        super(Model, self).__init__()
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len
        self.enc_in = configs.enc_in
        self.c_out = configs.c_out
        self.d_model = configs.d_model
        self.n_model = configs.n_model
        self.t_model = configs.t_model
        self.conv_kernel = configs.conv_kernel   # [(1,8), (3,16), (4,32), (6,64)]
        self.sconv_kernel = configs.sconv_kernel   # [4, 8, 16, 32]
        self.sample_kernel = [14, 14, 14, 14]
        self.period = configs.period   # [(24,862), (24,862), (24,862), (24,862)]
        self.output_attention = configs.output_attention
        self.device = 'cuda:0'
        self.d_layers = configs.d_layers
        self.dropout = 0.05
        self.k = configs.top_k

        t = 0
        for i in range(len(self.conv_kernel)):
            t += (self.period[i] // self.conv_kernel[i])
            
        self.Linear = nn.Linear(t*self.t_model, self.pred_len)
        # self.tLinear = FeedForwardNetwork((t + self.pred_len * self.d_model), self.pred_len)   # nn.Linear((t + self.pred_len * self.d_model), self.pred_len)

        self.nodevec = nn.Parameter(torch.randn(self.enc_in, t).to(self.device), requires_grad=True).to(
            self.device)  # (num_nodes,10)

        self.conv1 = nn.ModuleList()
        self.conv2 = nn.ModuleList()
        self.sample_conv = nn.ModuleList()
        self.projection_sample = nn.ModuleList()
        self.projection_s = nn.ModuleList()

        # Add Conv
        # downsampling convolution: padding=i//2, stride=i
        for i in range(len(self.conv_kernel)):
            c_temp = (self.period[i] // self.conv_kernel[i])
            t_temp = (self.period[i] // self.conv_kernel[i])
            self.conv1.append(nn.Conv1d(in_channels=1, out_channels=1,
                                    kernel_size=self.conv_kernel[i], padding=self.conv_kernel[i] // 2, stride=self.conv_kernel[i]))

            self.sample_conv.append(nn.Conv1d(in_channels=1, out_channels=self.t_model,
                                    dilation=t_temp, kernel_size=self.seq_len // self.period[i], padding=0, stride=1))

    def forward(self, x, x_mark_dec, y, y_mark_enc):
        seq_last = x[:,-1:,:].detach()
        x = x - seq_last
        x_enc = x.permute(0, 2, 1).reshape(-1, self.seq_len)
        x_enc = x_enc.unsqueeze(1)   # [B*self.enc_in, 1, self.seq_len]

        n_sample = torch.tensor([], device=self.device)
        c = 0
        t = 0
        for i in range(len(self.conv_kernel)):
            c_temp = (self.period[i] // self.conv_kernel[i])
            t_temp = (self.period[i] // self.conv_kernel[i])
            t += t_temp
            c += c_temp
            
            sample_out = self.conv1[i](x_enc)
            sample_out = self.sample_conv[i](sample_out)

            sample_out = sample_out[:, :, 0:t_temp]
            n_sample = torch.cat((n_sample, sample_out), dim=-1)   # [B, t_model, 42]

        n_sample = n_sample.permute(0, 2, 1)  # [B, 7, 42]
        dec_out = n_sample.reshape(-1, self.enc_in, t, self.t_model)  # [B, 7, 42, self.t_model]
        # nodevec = torch.sigmoid(self.nodevec).unsqueeze(dim=0).unsqueeze(dim=-2)
        nodevec = torch.sigmoid(self.nodevec).unsqueeze(dim=0).unsqueeze(dim=-1)
        dec_out = dec_out * nodevec  # [B, self.enc_in, 42, self.t_model]

        dec = self.Linear(dec_out.reshape(-1, self.enc_in, t * self.t_model)).permute(0, 2, 1)
        dec = dec + seq_last

        return dec